*** This code merges on the TAXSIM results

cap log close
set more off
set type double
set linesize 150

log using "${logdir}/get_data/post_sas/data_merge_taxsim_results.log" , replace


***********************************************************
* Load data, restrict sample, and merge on TAXSIM results *
***********************************************************

** prep tin-SSN bridge
use prim_ssn prim_tin using "${fromsasdir}/result.dta" if !missing(prim_ssn)
destring prim_ssn , replace
duplicates drop
sort prim_ssn prim_tin
by   prim_ssn: drop if _n>1
*NOTE: this ensures uniqueness on PRIM_SSN
tempfile tin_SSN
save `tin_SSN'


** prep CDW data
use "${statadir}/pre_taxsim_data.dta"
gen double cdw_yob = soi_prim_yob if nonfiler==1
rename soi_cdw_match TEMP_match
drop *unmasked_tin nonfiler soi_*
rename TEMP_match soi_cdw_match
sort prim_tin
*NOTE: the CDW data are unique on PRIM_TIN
tempfile cdw
save `cdw'


** prep SOI data
use "${statadir}/soi_2019.dta" , clear
rename soi_prim_unmasked_tin prim_ssn
drop *unmasked*
sort prim_ssn
*NOTE: the 2019 SOI file is unique on PRIM_SSN
merge 1:1 prim_ssn using `tin_SSN'
	keep if _merge==3
	drop _merge prim_ssn
xtile   agi_bin = soi_agi [aw=soi_wgt] if soi_agi>0 , n(20)
replace agi_bin = 0                    if soi_agi==0
replace agi_bin = -1                   if soi_agi<0
tab     agi_bin [aw=soi_wgt]
tab     agi_bin [aw=soi_wgt] if soi_agi>0
compress
sort prim_tin
tempfile soi
save `soi'


** merge TAXSIM results with SOI and CDW
use                      "${statadir}/taxsim_output_2019_1"              , clear
merge 1:1 prim_tin using "${statadir}/taxsim_output_2019_3_deps"         , nogen assert(match)
merge 1:1 prim_tin using "${statadir}/taxsim_output_2019_2_filstat"      , nogen assert(match using)
merge 1:1 prim_tin using "${statadir}/taxsim_output_2019_4_filstat_deps" , nogen assert(match)
merge 1:1 prim_tin using "${statadir}/taxsim_output_2019_5_extra_deps"   , nogen assert(match master)
*NOTE: taxsim files 1&3 only include  observations with soi_cdw_match==1
*      taxsim files 2&4 also include  observations with soi_cdw_match==0
*      taxsim file  5   only includes observations with nonfiler==1 (and no restriction on soi_cdw_match)

merge 1:1 prim_tin using `soi'
	tab _merge nonfiler
	keep if nonfiler==1 | (nonfiler==0 & _merge==3)
	drop _merge

merge 1:1 prim_tin using `cdw' , nogen assert(match)
replace soi_prim_yob = cdw_yob if nonfiler==1
drop cdw_yob


** add variables and save
gen double soi_fed_incm_tax = soi_tax_balance_due + soi_tax_withheld + soi_tx_paid_w_extnsn + soi_estmtd_tx_pymnts + soi_excess_sstax_withheld ///
							  - soi_seca_tax - soi_late_penalty
gen double soi_deps_tot = 0
foreach xx of numlist 1/10 {
	replace soi_deps_tot = soi_deps_tot + 1 if !missing(soi_dep`xx'_yob) & soi_dep`xx'_yob!=0
}
compress
save "${statadir}/soi_cdw_taxsim_2019.dta" , replace




cap log close


